/*******************************************************************************

Kilimanjaro make_ViltoAV.do

2018.07.15 (DJ) Created

This do file:
(i) constructs all villages-all agrovets pair with distances, 
(ii) calculates travel-adjusted input prices from each village to each agrovet. 
(iii) for each village, it finds the minimum travel-adj input prices
(iv) collapses the village-agrovet level data into village-level data 
	 with best travel-adjusted input prices. 

The output data set is the village-level <Kilimanjaro ViltoAV.dta>.

*******************************************************************************/

		
		
/*******************************************************************************
*******************************************************************************

	Construct data sets with the Missing Census Agrovets

*******************************************************************************
*******************************************************************************/
		
	** Step (1): make a pair for each village-agrovet shop
		use "${analysis}/Kilimanjaro agrovet_shop_level_censusappended.dta", clear	
		
		keep av_ownerid av_shopid av_district av_ward av_village ///
			 sellfert sellurea sellDAP sellseeds av_*price* av_*qty* av_*rev* av_${type}_17_median_imput
		gen to_merge = 1
		tempfile avshoplist
		save `avshoplist'
		
		use "${analysis}/Kilimanjaro road_quality_census.dta", replace
		keep village_name ward district
		merge 1:1 village_name ward district using "${analysis}/Kilimanjaro Vil_distance_to_borders.dta"
		drop _merge
		gen to_merge = 1
		tempfile villist
		save `villist'
		
		joinby to_merge using `avshoplist'
		drop to_merge
	
		gen has_agrovet_village = 0
		replace has_agrovet_village = 1 if (village_name == av_village) & (ward == av_ward) & (district == av_district)
		tempfile vilavpair
		save `vilavpair'
		
		// calculate distances from each village to each agrovet using the routing method
			/*
			* google distances
			use "${analysis}/Kilimanjaro ViltoVil.dta", clear
			keep district_origin ward_origin village_name_origin district_dest ward_dest village_name_dest distance_km duration_hrs
			rename (district_origin ward_origin village_name_origin district_dest ward_dest village_name_dest) ///
				   (district ward village_name av_district av_ward av_village)
			tempfile avgoogle
			save `avgoogle'
		
			use `vilavpair', clear
			merge m:1 district ward village_name av_district av_ward av_village using `avgoogle'
			/*
			Result                           # of obs.
			-----------------------------------------
			not matched                       206,733
				from master                    23,470  (_merge==1) <-- see what we can do here later.
				from using                    183,263  (_merge==2)
		
			matched                           233,600  (_merge==3)
			-----------------------------------------
			*/
			drop if _m==2
			drop _merge
			rename (distance_km duration_hrs) (google_vil_AVvil_km google_vil_AVvil_hrs)
			tempfile vilpair_googledist
			save `vilpair_googledist'
			*/
		
		** Bring in travel costs directly measured from Transport Surveys
			use `ViltoMkt_KMCOST', clear
			keep if survey_region_O == "KILIMANJARO" & survey_region_D == "KILIMANJARO"
			rename (survey_district_O survey_ward_O survey_village_O survey_district_D survey_ward_D survey_village_D) ///
				   (district ward village_name av_district av_ward av_village)
			keep district ward village_name av_district av_ward av_village DIST_*
			tempfile measuredcost
			save `measuredcost'
			
			use `vilavpair', clear
			merge m:1 district ward village_name av_district av_ward av_village using `measuredcost'
			drop if _merge==2
			drop _merge
			rename DIST_km_direct google_vil_AVvil_km
		
		
		** We can compute the travel adjusted prices in three different ways
			
			global costperkm 138 // 0.06*2300 comes from Table 2 NEW. <--- *************** This is obsolete ***************
			
			global numtripeqv 2.6969 // 1trip going + 1trip coming + 0.6969 carrying. 
									// 0.6969 comes from the coefficients of hours in col(2) and (4) of Appendix Table 2
			
			//global viltovilkm DIST_km_direct
			global viltovilkm google_vil_AVvil_km
			
			* (1) Using Market Transport surveys directly and calibrated carrying cost 
			gen travel_adj_price_50kg	=	av_${type}_rprice_17 + ${viltovilkm}*${costperkm}*${numtripeqv}
			
			* (1.1) Cut the Transportation Costs in (1) by half for Alan's counterfactuals
			gen travel_adj_price_50kgM1	=	av_${type}_rprice_17 + ${viltovilkm}*${costperkm}*${numtripeqv}/2
			
			* (2) Using Alan's estimation from a trade model: Overall AVE for each bin
			gen AVE_adj_price_50kg	   = av_${type}_rprice_17 if ${viltovilkm}==0
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+0.20) if ${viltovilkm}>0 & ${viltovilkm}<=5
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+0.47) if ${viltovilkm}>5 & ${viltovilkm}<=10
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+0.78) if ${viltovilkm}>10 & ${viltovilkm}<=15
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+1.05) if ${viltovilkm}>15 & ${viltovilkm}<=20
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+1.19) if ${viltovilkm}>20 & ${viltovilkm}<=30
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+1.76) if ${viltovilkm}>30 & ${viltovilkm}<=40
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+2.18) if ${viltovilkm}>40 & ${viltovilkm}<=50
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+3.12) if ${viltovilkm}>50 & ${viltovilkm}<=100
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+5.69) if ${viltovilkm}>100 & !mi(${viltovilkm})
			
			* (2.1) Cut the Transportation Costs in (2) by half for Alan's counterfactuals
			gen AVE_adj_price_50kgM1		=	av_${type}_rprice_17 + av_${type}_rprice_17*0.04*${viltovilkm}/2
			
			* (2) Using Alan's estimation from a trade model: 4% AVE per KM
			gen AVE2_adj_price_50kg		=	av_${type}_rprice_17 + av_${type}_rprice_17*0.02*${viltovilkm}
			
			* (2.1) Cut the Transportation Costs in (2) by half for Alan's counterfactuals
			gen AVE2_adj_price_50kgM1		=	av_${type}_rprice_17 + av_${type}_rprice_17*0.02*${viltovilkm}/2
						
			* (3) Using Directly Measured Transport Surveys 
			gen DR_adj_price_50kg		=	av_${type}_rprice_17 + DIST_cost_USD_direct*2300*${numtripeqv}
	
			* (3.1) Cut the Transportation Costs in (3) by half for Alan's counterfactuals
			gen DR_adj_price_50kgM1		=	av_${type}_rprice_17 + DIST_cost_USD_direct*2300*${numtripeqv}/2
	
			* (4) Using Directly Measured Transport Surveys + Imputed with Median Prices (IM) for CDFs
			gen IM_adj_price_50kg		=	av_${type}_17_median_imput + DIST_cost_USD_direct*2300*${numtripeqv}
		
			* (4.1) Cut the Transportation Costs in (4) by half for Alan's counterfactuals
			gen IM_adj_price_50kgM1		=	av_${type}_17_median_imput + DIST_cost_USD_direct*2300*${numtripeqv}/2
	
			
			* (5) After dropping some villages near borders
			foreach dist in 3 5 10 15{
				gen DR`dist'_adj_price_50kg = av_${type}_rprice_17 + DIST_cost_USD_direct*2300*${numtripeqv} if dist_border_km>`dist'
			}
	
			* (6) Prices from the nearest shops
			egen village_id = group(village_name ward district)

			//replace ${viltovilkm} = 0 if mi(${viltovilkm})
			
 				// Nearest 1st and 2nd Agrovet Shop (i.e. 2nd village is allowed to be the same as the 1st village)
					gen mi_price_or_km = (mi(av_urea_rprice_17)|mi(${viltovilkm}))
					
					// rank by distance AND prices -> this helps to tag the agrovet in a village that has the lowest prices
					bys mi_price_or_km village_id (${viltovilkm} av_urea_rprice_17): gen rank_to_anyav = _n  if mi_price_or_km==0
					gen ${viltovilkm}_avrank1= ${viltovilkm} if rank_to_anyav==1
					gen ${viltovilkm}_avrank2= ${viltovilkm} if rank_to_anyav==2
				
				// Nearest 1st and 2nd Village (i.e. 2nd village has to be different from the 1st village)
				
					// nearest 1st and 2nd villages with agrovets that sell fertilizer
						egen tag = tag(village_id av_district av_ward av_village)
						egen villagepair_id = group(village_id av_district av_ward av_village)
						
						// create the rank at the village-AV village level and fill down 
						bys mi_price_or_km village_id : egen tempvar = rank(${viltovilkm}) if tag & mi_price_or_km==0, unique
						bys mi_price_or_km village_id villagepair_id: egen rank_to_anyvil = total(tempvar), m
						
						// distances to the nearest and second nearest village with UREA - do for UREA (as opposed to any) because that's what we use later
						gen ${viltovilkm}_rank1= ${viltovilkm} if rank_to_anyvil==1
						gen ${viltovilkm}_rank2= ${viltovilkm} if rank_to_anyvil==2
					 
						br village_id ${viltovilkm} tempvar  rank_to_anyvil
						sort village_id  ${viltovilkm} villagepair_id
						cap drop tempvar
				
					// Nearest 1st and 2nd village with agrovets that sell seeds 
						bys village_id : egen tempvar = rank(${viltovilkm}) if tag & sellseeds==1, unique
						bys village_id villagepair_id: egen rank_to_av_seeds = total(tempvar), m
						
						// distances to the nearest and second nearest village with SEEDS
						gen ${viltovilkm}_rank1_seed= ${viltovilkm} if rank_to_av_seeds==1
						gen ${viltovilkm}_rank2_seed= ${viltovilkm} if rank_to_av_seeds==2
					
				
				order village_id
				//br village_id ${viltovilkm} av_urea_rprice_17 *rank*
				sort village_id ${viltovilkm}
				
				// Construct the travel-adj price from the nearest agro-input sellers
				gen NEAR_adj_price_50kg = av_${type}_rprice_17 + DIST_cost_USD_direct*2300*${numtripeqv} if rank_to_anyav==1
				
			tempfile vil_av_long
			save `vil_av_long'
			
			
		
		
		
	** Step (3):  For each village, pick the minimum travel-cost adjusted prices 
	
		// minimum travel-cost adjusted prices among all agrovets with positive prices
		foreach var in "50kg" "50kgM1" {
			
			// BASIC TRAVEL COST
				// pick the minimum price
				bys village_id: egen min_travel_adj_price_`var'	= min(travel_adj_price_`var')
				
				// distance, travel costs, and unadjusted price to the best agrovet
				gen km_travel_adj_`var' 		= ${viltovilkm} 								if min_travel_adj_price_`var' == travel_adj_price_`var' 	& !mi(travel_adj_price_`var')
				gen tripcost_travel_adj_`var' 	= ${viltovilkm}*${costperkm}				 	if min_travel_adj_price_`var' == travel_adj_price_`var' 	& !mi(travel_adj_price_`var')
				gen unadjusted_price_`var'  	= av_${type}_rprice_17 								if min_travel_adj_price_`var' == travel_adj_price_`var' 	& !mi(travel_adj_price_`var')
		}
		
		
		foreach var in "50kg" {	
		
			// OTHER METHODS OF COMPUTING TRANSPORT COSTS
				// DIRECT TRANSPORT COSTS
				foreach method in NEAR DR AVE AVE2 DR3 DR5 DR10 DR15{
					// pick the minimum price
					bys village_id: egen min_`method'_adj_price_`var'	= min(`method'_adj_price_`var')
					bys village_id: egen min_`method'_adj_price_`var'_N	= count(`method'_adj_price_`var')
					
					// distance, travel costs, and unadjusted price to the best agrovet
					gen km_`method'_adj_`var'		    = ${viltovilkm} 			if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
					gen tripcost_`method'_adj_`var' 	= DIST_cost_USD_direct*2300	if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
					gen unadjusted_price_`var'_`method' = av_${type}_rprice_17 		if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
				}
				
				// DIRECT TRANSPORT COSTS + IMPUTED MEDIAN PRICE FOR MISSING AGROVETS
				foreach method in IM{
					// pick the minimum price
					bys village_id: egen min_`method'_adj_price_`var'	= min(`method'_adj_price_`var')
					bys village_id: egen min_`method'_adj_price_`var'_N	= count(`method'_adj_price_`var')
					
					// distance, travel costs, and unadjusted price to the best agrovet
					gen km_`method'_adj_`var'		    = ${viltovilkm} 			if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
					gen tripcost_`method'_adj_`var' 	= DIST_cost_USD_direct*2300		if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
					gen unadjusted_price_`var'_`method' = av_${type}_17_median_imput	if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
				}
				
				fsum min_*50kg
				/*
				                  Variable |        N     Mean       SD      Min      Max                                                                                                                              
				---------------------------+---------------------------------------------
				 min_travel_adj_price_50kg |   248501 48491.60  5847.76 40000.00 72722.80  
					 min_DR_adj_price_50kg |   248050 50656.62  7577.18 40000.00 96701.27  
					 min_IM_adj_price_50kg |   248050 51477.98  7212.77 42000.00 97709.24  
				*/
		}
		
		
		// minimum prices among agrovets within restricted distances (within 10km and 20km)
		pause off
		foreach var in "50kg" "50kgM1"{
			
			foreach type in travel_adj DR_adj IM_adj AVE_adj{
				foreach dist in 5 10 20{
					tempvar `type'`var'`dist'
					gen ``type'`var'`dist''= `type'_price_`var' if ${viltovilkm}<=`dist'
					bys village_id: egen min`dist'_`type'_price_`var' = min(``type'`var'`dist'')
					bys village_id: egen min`dist'_`type'_price_`var'_N = count(``type'`var'`dist'')
					
					// distances to the agrovets with minimum travel-adj prices
					gen km_`type'`dist'_`var' = ${viltovilkm} if min`dist'_`type'_price_`var' == ``type'`var'`dist'' & !mi( ``type'`var'`dist'')
				}
			}
		}	
		
		pause off
		
		// number of agrovets within X km that sell fert or seeds
			foreach dist in 5 10 20{
				tempvar fert seed fertseed
	
				// number of AV that sell seeds
				gen `seed' = (sellseeds==1 & ${viltovilkm}<=`dist') if !mi(${viltovilkm})&!mi(sellseeds)
				bys village_id: egen num_av_seed_less_`dist'km = total(`seed'), m
	
				// number of AV that sell fertilizer
				gen `fert'= (sellfert==1 & ${viltovilkm}<=`dist')  if !mi(${viltovilkm})&!mi(sellfert)
				bys village_id: egen num_av_fert_less_`dist'km = total(`fert'), m
				
				// number of AV that sell either seed or fertilizer
				gen `fertseed' = ((sellfert==1|sellseeds==1) & ${viltovilkm}<=`dist') if !mi(${viltovilkm})&!mi(sellseeds)&!mi(sellfert)
				bys village_id: egen num_av_fertseed_less_`dist'km = total(`fertseed'), m
				pause
			}
		
		// collapse to the village level
		collapse (min) vil_to_anyav_km_AV1 = ${viltovilkm}_avrank1 vil_to_anyav_km_AV2 = ${viltovilkm}_avrank2 ///
				 (min) vil_to_anyav_km_r1 = ${viltovilkm}_rank1 vil_to_anyav_km_r2 = ${viltovilkm}_rank2 ///
				 (min) vil_to_anyav_km_r1_seed = ${viltovilkm}_rank1_seed vil_to_anyav_km_r2_seed = ${viltovilkm}_rank2_seed ///
				 (min) vil_to_anyav_km = ${viltovilkm} ///
					   min* km_* tripcost_* ///
				 (mean) unadjusted_price_50kg* num_av_* ///
				 (max) has_agrovet_village, by(village_id village_name ward district)
		 
		// has any agrovet within X km (dummy variable)
		foreach km in 5 10 20{
			gen has_av_fertseed_less_`km'km = (num_av_fertseed_less_`km'km>0) if !mi(num_av_fertseed_less_`km'km)
			gen has_av_fert_less_`km'km = (num_av_fert_less_`km'km>0) if !mi(num_av_fert_less_`km'km)
			gen has_av_seed_less_`km'km = (num_av_seed_less_`km'km>0) if !mi(num_av_seed_less_`km'km)
		}
		
	
	// convert to dollars
		foreach var of varlist min*price_50kg* unadjusted_price_50kg*{
			replace `var' = `var'/2300
		}
		
		
	// generate other variables
		gen onewaycost_bestadj_av_${type}_usd = tripcost_travel_adj_50kg/2300
		gen travelcost_bestadj_av_${type}_usd = ${numtripeqv}*tripcost_travel_adj_50kg/2300
		
		foreach var in NEAR DR DR3 DR5 DR10 DR15{
			gen onewaycost_`var'_adj_av_${type}_usd = tripcost_`var'_adj_50kg/2300
			gen travelcost_`var'_adj_av_${type}_usd = ${numtripeqv}*tripcost_`var'_adj_50kg/2300
		}
	
	
	// winsorize
		foreach var of varlist vil_to_any*av_km*{
			winsor2 `var', cut(1 99) suffix(_w1) label
		}	
		
		fsum km_*
	
		fsum min_travel_adj_price_50kg min_DR_adj_price_50kg min_IM_adj_price_50kg
		/*
		                 Variable |        N     Mean       SD      Min      Max                                                                                                                              
		---------------------------+---------------------------------------------
		 min_travel_adj_price_50kg |      551    21.08     2.54    17.39    31.62  
			 min_DR_adj_price_50kg |      550    22.02     3.30    17.39    42.04  
			 min_IM_adj_price_50kg |      550    22.38     3.14    18.26    42.48  
		*/
	compress 
	
	tempfile kilimanjaroViltoAV
	save `kilimanjaroViltoAV'
	
